Skip to content

FP8 alloc and zero impl#98

Open
guoqingbao wants to merge 1 commit intoEricLBuehler:dev_mainfrom
guoqingbao:candle-eric
Open

FP8 alloc and zero impl#98
guoqingbao wants to merge 1 commit intoEricLBuehler:dev_mainfrom
guoqingbao:candle-eric

Conversation

@guoqingbao
Copy link

Alloc and zero implementations for FP8, this is necessary if we want to create the empty fp8 kvcache in mistral.rs.

@sempervictus
Copy link

Looks like this is already supported upstream (the allocs and zero, that is) - picking this commit to HF's Candle gives us:

$ git diff origin/main 
diff --git c/candle-core/src/cuda_backend/device.rs w/candle-core/src/cuda_backend/device.rs
index a8a43121..2ac9eb6b 100644
--- c/candle-core/src/cuda_backend/device.rs
+++ w/candle-core/src/cuda_backend/device.rs
@@ -1,4 +1,5 @@
-use crate::backend::BackendDevice;
+use super::{CudaError, CudaStorage, CudaStorageSlice, WrapErr};
+use crate::backend::{BackendDevice, BackendStorage};
 use crate::{CpuStorage, CpuStorageRef, DType, Layout, Result, Shape};
 pub use candle_kernels as kernels;
 pub use cudarc;
@@ -8,8 +9,6 @@ use half::{bf16, f16};
 use std::collections::HashMap;
 use std::sync::{Arc, Mutex};
 
-use super::{CudaError, CudaStorage, CudaStorageSlice, WrapErr};
-
 /// Unique identifier for cuda devices.
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 pub struct DeviceId(usize);
@@ -346,6 +345,12 @@ impl BackendDevice for CudaDevice {
             DType::F8E4M3 => {
                 let data = self.alloc_zeros::<F8E4M3>(elem_count)?;
                 CudaStorageSlice::F8E4M3(data)
+                // return Err(CudaError::InternalError("F8E4M3 not supported in CUDA backend").into())
+            }
+            DType::F6E2M3 | DType::F6E3M2 | DType::F4 | DType::F8E8M0 => {
+                return Err(
+                    CudaError::InternalError("Dummy types not supported in CUDA backend").into(),
+                )
             }
         };
         Ok(CudaStorage {
@@ -465,6 +470,12 @@ impl BackendDevice for CudaDevice {
             DType::F8E4M3 => {
                 let data = self.alloc::<F8E4M3>(elem_count)?;
                 CudaStorageSlice::F8E4M3(data)
+                // return Err(CudaError::InternalError("F8E4M3 not supported in CUDA backend").into())
+            }
+            DType::F6E2M3 | DType::F6E3M2 | DType::F4 | DType::F8E8M0 => {
+                return Err(
+                    CudaError::InternalError("Dummy types not supported in CUDA backend").into(),
+                )
             }
         };

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants